clear all
set more off
cap log close

********************************************************************************
***** Project: The Short and Long Term Effects of In-Person Performance Feedback
********************************************************************************
***** A. R. Soetevent & G. J. Romensen
********************************************************************************
***** Treatment Effects Coaching: Sun and Abrahams (2021) CATT analysis
********************************************************************************
***** 
********************************************************************************
***** Latest update: 04-01-2024
********************************************************************************
*global filepath "C:\JPEMicReplication"
*global paperpath "$filepath\TablesGraphs"
local abcd "acceleratie rem bochten fueleconomyLpKM"
log using "$filepath/Logs/X04RegressionsCoachingCATTstatic.log", replace

/*** Notes ***/
*=> No fuel economy observations postfeedback in urban area.
*=> Fuel Economy: kilometers per liter of fuel
*=> ABC dimensions: number of events per 10 kilometers
/*************/

use "$filepath\DEPO\DataMainAnalysisDEPO.dta"

rename bustype bustypes
** Drop all Irisbus observations
drop if bustypes==3
gen byte ZH = 0 
replace ZH = 1 if regio == "ZH"

** Analysis: based on Treatment region
drop if regio == "ZH"

replace intouro = 0 if intouro ==.

* Drop months with imperfect tracking by coaches 
drop if datum>date("30-4-2016", "DMY")

* Drop eco-coaches [chauf_nr_rug: randomly generated depository numbers!]
local eco_nr "939 1404 519 1286 1610 531"

foreach x of local eco_nr {
drop if chauf_nr_rug==`x'
}

/*** Determine the set of observations used for the analysis ***/
gen byte regobsfuel=1
replace regobsfuel=0 if geplande_ritafstand==. | lnovcheckins==. | punctuality==. | aantal_haltes==. | dep_fueleconomyLpKM==.

gen byte regobsabc=1
replace regobsabc=0  if geplande_ritafstand==. | lnovcheckins==. | punctuality==. | aantal_haltes==. | dep_acceleratie==. | dep_bochten==. | dep_rem==.


****************************************
*** A. Create global list of covariates
****************************************

global covBusType "vdl10 vdl14 iris10 iris10cng iris12 iris12cng intouro"
* (vdl12 = default - more than 50 per cent of observations)

global covEnvironm "ochtendspits avondspits uitleenrit geplande_ritafstand aantal_haltes stadsrit"

global covPassengers "lnovcheckins ovcheckinsmissing"

set matsize 10000

****************************************
* Definition variables for CATT analysis
****************************************
*   Code the cohort categorical variable based on when the individual was first coached, which will be inputted in cohort(varname).
gen coachinghulp = weekindex if postcoaching == 1
bysort chauf_nr_rug: egen TimeFirstCoaching = min(coachinghulp)
drop coachinghulp

label variable TimeFirstCoaching "categorical variable that contains the initial treatment timing (week) of each unit. Set missing for never treated units"

gen byte never_coached = 0 
replace never_coached  = 1 if coachdatum_1 > date("30-4-2016", "DMY")
replace never_coached  = 1 if coachdatum_1 ==. 
label variable never_coached "binary variable that corresponds to the control cohort, = 1 for never-treated units, 0 otherwise" 

* Code the relative time categorical variable.
gen byte rw = weekindex - TimeFirstCoaching 


* Check if there is a sufficient number of treated units for each relative time.  With very few units it might be better to bin the relative times and assume constant treatment effects within the bin.
tab rw

gen cp = (rw >=0)
 
 
*****************************************************************************
** Estimation effect coaching + plots 
*****************************************************************************
gen byte selectie=0

local i = 0
foreach var of local abcd {
local i = `i' + 1
		if `i'<4 {
		replace selectie=regobsabc 
		}
		if `i'==4 {
		replace selectie=regobsfuel 
		}

	
tab selectie never_coached 
preserve
	duplicates drop chauf_nr_rug, force
	tab  never_coached 
	* 110 drivers never coached.
	tab coachdatum_2
	* 21 drivers received a second session before or at April 30, 2016.
restore

* We use the IW estimator to estimate the dynamic effect on fueleconomyLpKM associated with each relative time.
* With many leads and lags, we need a large matrix size to hold intermediate estimates.

************************************************************************************************
** Table 3: Time Invariant Effect In-Person Coaching on Driving Performance - Panel B **********
************************************************************************************************
*  Note that Sun and Abraham (2020) only establishes the validity of the IW estimators for balanced panel data without covariates
*  WITHOUT COVARIATES
eventstudyinteract dep_`var' cp if selectie==1, cohort(TimeFirstCoaching) control_cohort(never_coached) absorb(i.chauf_nr_rug i.weekindex) vce(cluster chauf_nr_rug)
estimates store mSA`i'

*  WITH COVARIATES
eventstudyinteract dep_`var' cp if selectie==1, cohort(TimeFirstCoaching) covariates($covBusType $covPassengers $covEnvironm i.bustypes) control_cohort(never_coached) absorb(i.chauf_nr_rug i.weekindex) vce(cluster chauf_nr_rug)
estimates store mSAcov`i'

}


log close


